This document will try to approach Zoom’s historical analysis from a quantitative perspective, it is meant to be a complementary document to the case write app & excel worksheet submitted by Team-7.

Data Wrangling

# transform data into the correct format
df_tall[, c("Parameter", "Ticker", "Year")] <- lapply(df_tall[, c("Parameter", "Ticker", "Year")], as.factor)

# show the final data format
str(df_tall)
## 'data.frame':    780 obs. of  4 variables:
##  $ Parameter: Factor w/ 12 levels " AP/Revenues ",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ Ticker   : Factor w/ 13 levels "EGHT","FIVN",..: 13 13 13 13 13 12 12 12 12 12 ...
##  $ Year     : Factor w/ 5 levels "2018","2019",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ Value    : num  -0.03 0.03 0.01 0.16 0.22 0.05 0 0 0.01 0 ...

Wrangle data in preparation for the clustering analysis

# spread data around year
df <- spread(df_tall, key = Year, value = Value)


# split the df into a df_list around parameters while dropping the parameter column
dfl <- split(df, f = df$Parameter)
dfl <- lapply(dfl, function(x) subset(x, select = -Parameter))

summary(dfl)
##                             Length Class      Mode
##  AP/Revenues                6      data.frame list
##  AR/Revenues                6      data.frame list
##  Capital Turnover           6      data.frame list
##  Cash/Revenue               6      data.frame list
##  COGS/Revenue               6      data.frame list
##  EBIT Margin                6      data.frame list
##  Inventories/Revenue        6      data.frame list
##  NPP&E/Revenues             6      data.frame list
##  Prepaid Expenses/Revenues  6      data.frame list
##  ROIC                       6      data.frame list
##  SG&A/Revenue               6      data.frame list
##  WCR/Revenues               6      data.frame list
str(dfl$` AP/Revenues `)
## 'data.frame':    13 obs. of  6 variables:
##  $ Ticker: Factor w/ 13 levels "EGHT","FIVN",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ 2018  : num  0.12 0.03 0.27 0.01 0.11 0.12 0.06 0.02 0.07 0.02 ...
##  $ 2019  : num  0.15 0.03 0.27 0.03 0.11 0.11 0.01 0.04 0.08 0.03 ...
##  $ 2020  : num  0.2 0.04 0.27 0.01 0.09 0.16 0.01 0.05 0.08 0.02 ...
##  $ 2021  : num  0.11 0.03 0.27 0.1 0.11 0.19 0.03 0.04 0.1 0.02 ...
##  $ 2022  : num  0.17 0.03 0.27 0.08 0 0.22 0.03 0.03 0.12 0.03 ...
# use ticker name as row column for each df in dfl & drop ticker column
for (item in 1:length(dfl)) {
  rownames(dfl[[item]]) <- dfl[[item]]$Ticker
  dfl[[item]]$Ticker <- NULL
}

head(dfl$` EBIT Margin `, 7)
##       2018  2019  2020  2021  2022
## EGHT -0.11 -0.25 -0.36 -0.27 -0.24
## FIVN  0.03  0.01 -0.03 -0.09 -0.11
## NICE  0.14  0.15  0.15  0.14  0.15
## PLTR -0.08 -0.27 -1.07 -0.78 -1.05
## QUMU -0.28 -0.21 -0.20 -0.76  0.00
## RBBN -0.05  0.01  0.04  0.02 -0.04
## RBLX -0.27 -0.15 -0.29 -0.26 -0.42
#build the a clusterization function
build_cluster_map <- function(dataframe, number_of_clusters) {
  optimum_cluster <- kmeans(dataframe, 
                            centers = number_of_clusters, 
                            nstart = 25)
  cluster_df <- as.data.frame(optimum_cluster$cluster)
  colnames(cluster_df)[1] <- "Cluster"
  
  return (
    list(
      cluster_table = cluster_df, 
      visualization = fviz_cluster(optimum_cluster, data = dataframe)
    )
  )
}


analyize_cluster <- function(dataframe, number_of_clusters = 3){
  k_max <- 10
  
  # Average Silhouette Width Analysis
  p1 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "silhouette") + 
    theme_minimal() + theme(axis.title.y = element_blank()) +
    ggtitle("Average Silhouette Width") + 
    geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")

  # Total Within Sum of Squares Analysis
  p2 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "wss") + 
    theme_minimal() + theme(axis.title.y = element_blank()) +
    ggtitle("Total Within Sum of Squares") + 
    geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
  
  # Gap Statistics (k) Analysis
  gap_stat <- clusGap(dataframe, FUN = kmeans, nstart = 25, K.max = k_max, B = 50)
  p3 <- fviz_gap_stat(gap_stat) + 
    theme_minimal() + theme(axis.title.y = element_blank()) +
    ggtitle("Gap Statistics (k)") + 
    geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")

  
  # Cluster Analysis
  cluster_map <- build_cluster_map(dataframe, number_of_clusters)
  
  p4 <-  cluster_map$visualization + 
    theme_minimal() + 
    ggtitle(paste("Cluster Plot for",number_of_clusters,"Clusters"))
  
  p5 <- ggplot() + 
    theme_minimal() + 
    annotation_custom(tableGrob(cluster_map$cluster_table))

  
  ggarrange(
    ggarrange(p1, p2, p3, widths = c(1,1), ncol = 3), 
    ggarrange(p4, p5, widths = c(2,1), ncol = 2), 
    nrow = 2, labels = c("A","B"), heights = c(2,3)
    )
  
}

Analyizing Financial Ratios

Analyizing All Ratios’ Principle Components

In this section we will utilize principle components to reduce the 12 financial ratios into two dimensions, we understand that this may cause loss of information due the reduction method, but our hypothesis is that performing clustering analysis on the principle components may reveal association between Zoom & other companies that we can’t see by analyzing individual financial ratio.

# spread data around Parameter
dfw <- pivot_wider(df_tall, names_from = Parameter, values_from = Value)


dfw$Year <- factor(dfw$Year, levels = c(2018, 2019, 2020, 2021, 2022))

dfw <- as.data.frame(dfw)

# split the df into a df_list around Year while dropping the Year column
dfwl <- split(dfw, f = dfw$Year)
dfwl <- lapply(dfwl, function(x) subset(x, select = -Year))

summary(dfwl)
##      Length Class      Mode
## 2018 13     data.frame list
## 2019 13     data.frame list
## 2020 13     data.frame list
## 2021 13     data.frame list
## 2022 13     data.frame list
head(dfwl$'2018', 7)
##    Ticker  ROIC   EBIT Margin   Capital Turnover   COGS/Revenue   SG&A/Revenue 
## 1      ZM  -0.03         -0.03               1.01           0.20           0.72
## 6      VG   0.05          0.05               0.96           0.41           0.48
## 11   TWLO  -0.13         -0.18               0.73           0.46           0.45
## 16   TEAM  -0.05         -0.05               0.45           0.20           0.38
## 21   SPLK  -0.13         -0.14               0.93           0.20           0.72
## 26    RNG  -0.02         -0.02               0.97           0.23           0.64
## 31   RBLX  -0.45         -0.27               1.68           0.22           0.23
##     NPP&E/Revenues   Cash/Revenue   AR/Revenues   Inventories/Revenue 
## 1              0.09           0.92          0.16                     0
## 6              0.05           0.00          0.07                     0
## 11             0.10           1.15          0.15                     0
## 16             0.06           1.97          0.07                     0
## 21             0.18           0.89          0.30                     0
## 26             0.22           0.84          0.14                     0
## 31             0.29           0.87          0.16                     0
##     Prepaid Expenses/Revenues   WCR/Revenues   AP/Revenues 
## 1                         0.13           0.84          0.02
## 6                         0.00          -0.03          0.05
## 11                        0.04           1.23          0.13
## 16                        0.02           1.62          0.02
## 21                        0.00           0.91          0.07
## 26                        0.02           0.97          0.02
## 31                        0.03           0.49          0.06
# build row name & scale all values
for (item in 1:length(dfwl)) {
  rownames(dfwl[[item]]) <- dfwl[[item]]$Ticker
  # use ticker name as row column for each df in dfl & drop ticker column
  dfwl[[item]]$Ticker <- NULL
  for (column in 1:length(dfwl[[item]])) {
    dfwl[[item]][[column]] <- scale(dfwl[[item]][[column]])
  }
}

head(dfwl$'2018', 7)
##            ROIC   EBIT Margin   Capital Turnover   COGS/Revenue   SG&A/Revenue 
## ZM    0.57805435     0.3814197        -0.05057769     -1.0834921      1.2945956
## VG    0.97883871     1.0425472        -0.16799018      1.0755834     -0.2589191
## TWLO  0.07707391    -0.8581944        -0.70808763      1.5896490     -0.4531084
## TEAM  0.47785827     0.2161378        -1.36559756     -1.0834921     -0.9062169
## SPLK  0.07707391    -0.5276306        -0.23843767     -1.0834921      1.2945956
## RNG   0.62815240     0.4640606        -0.14450768     -0.7750528      0.7767573
## RBLX -1.52606350    -1.6019628         1.52274966     -0.8778659     -1.8771636
##       NPP&E/Revenues   Cash/Revenue   AR/Revenues   Inventories/Revenue 
## ZM        -0.22226978      0.3402703    0.04632342            -0.2773501
## VG        -0.72479275     -1.4214754   -0.85698318            -0.2773501
## TWLO      -0.09663903      0.7807067   -0.05404398            -0.2773501
## TEAM      -0.59916201      2.3509583   -0.85698318            -0.2773501
## SPLK       0.90840692      0.2828220    1.45146701            -0.2773501
## RNG        1.41092989      0.1870750   -0.15441138            -0.2773501
## RBLX       2.29034510      0.2445232    0.04632342            -0.2773501
##       Prepaid Expenses/Revenues   WCR/Revenues   AP/Revenues 
## ZM                     3.0705979      0.4999849    -0.8220927
## VG                    -0.5582905     -1.2507363    -0.4057081
## TWLO                   0.5582905      1.2847910     0.7046509
## TEAM                   0.0000000      2.0695970    -0.8220927
## SPLK                  -0.5582905      0.6408475    -0.1281183
## RNG                    0.0000000      0.7615869    -0.8220927
## RBLX                   0.2791453     -0.2043282    -0.2669132

2018 Cluster Analysis

df_to_analyze <- dfwl$'2018'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

2019 Cluster Analysis

df_to_analyze <- dfwl$'2019'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

The two graphs above highlights that ZM has shared a cluster with SPLK & RNG during 2018, but it is having its own cluster in 2019. It is expected for Zoom to drift in its similarities post IPO, hence, we will run the lumsum analysis again with more recent year.

2022 Cluster Analysis

df_to_analyze <- dfwl$'2022'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

We have noticed that ZM is again is having its own cluster during 2022, which suggests that ZM have some uniquiness in its financial ratios compared to other companies in the sample, we will try to uncover that relationship in the next section by limiting the parameters of the principle components analysis.

Analyizing key Ratios’ Principle Components

In this section we will rerun the principle components analysis on ROIC, EBIT Margin, and Capital Turnover parameters only.

# build a limited data frame

dfwll <- dfwl


# build row name & scale all values
for (item in 1:length(dfwll)) {
  
  dfwll[[item]] <- select(dfwll[[item]], 
                          -c(' COGS/Revenue ',
                             ' SG&A/Revenue ', 
                             ' NPP&E/Revenues ', 
                             ' Cash/Revenue ', 
                             ' AR/Revenues ', 
                             ' Inventories/Revenue ', 
                             ' Prepaid Expenses/Revenues ', 
                             ' WCR/Revenues ', 
                             ' AP/Revenues '
                             )
                          )
}


head(dfwll$'2019', 7)
##           ROIC   EBIT Margin   Capital Turnover 
## ZM    0.8957509     0.8238972         1.11702451
## VG    0.6814301     0.7529656         0.06848243
## TWLO  0.2527886    -1.6587069        -1.45487116
## TEAM  0.3956691     0.3273764        -0.30741002
## SPLK  0.1099081    -0.3110075        -0.88114059
## RNG   0.3956691     0.3273764        -0.40632908
## RBLX -1.3903373    -0.3819391         1.90837703

2018 Cluster Analysis

df_to_analyze <- dfwll$'2018'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

2019 Cluster Analysis

df_to_analyze <- dfwll$'2019'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

2020 Cluster Analysis

df_to_analyze <- dfwll$'2020'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

2021 Cluster Analysis

df_to_analyze <- dfwll$'2021'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

2022 Cluster Analysis

df_to_analyze <- dfwll$'2022'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

The analysis above suggests that principle components cauterization over ROIC, EBIT Margin, and Capital Turnover suggests that ZM are having it’s own cluster in 2019, while it shared the cluster with VG, TEAM, RNG, RBBN,FIVN in 2018, yet that cluster has changed over the following years 2020-2022. Hence, we will be extracting the cluster data to Excel to run another cluster analysis on the combined grouping to identify the best compaines to to be considered as ZM comps.

Analyizing Indvidual Ratio

Account Payable to Revenue

df_to_analyze <- dfl$` AP/Revenues `

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

Account Receivable to Revenue

df_to_analyze <- dfl$` AR/Revenues `

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

Capital Turnover

df_to_analyze <- dfl$' Capital Turnover '

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

Cash to Revenue

df_to_analyze <- dfl$' Cash/Revenue '

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

EBITDA Margin

df_to_analyze <- dfl$' EBIT Margin '

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

NPPE to Revenue

df_to_analyze <- dfl$' NPP&E/Revenues '

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

ROIC

df_to_analyze <- dfl$' ROIC '

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

SG&A to Revenue

df_to_analyze <- dfl$' SG&A/Revenue '

# identify the optimum cluster size
analyize_cluster(df_to_analyze,4 )

WCR to Revenue

df_to_analyze <- dfl$' WCR/Revenues '

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)